/*==============================================================================
FILE NAME: Create_air_complaints.do
INPUTS: incidents.dta
OUTPUTS: area complaints.dta, area air complaints.dta
CREATED: 26 September 2023
==============================================================================*/

//create TCEQ area-by-month panel of complaints
set more off

/* Set directory if working independently through code
if c(username)=="" { //insert username
	global rootdir "" // insert root path
	global processed_data "$rootdir/processed_data"  // Define global paths for replication package
} 
*/

//create monthly count of complaints by TCEQ region
use "$processed_data/incidents.dta", clear
keep ComplaintIncident TCEQRegion IncidentRecDate IncidentStatus
duplicates drop
isid ComplaintIncident
gen temp = date(IncidentRecDate,"MDY")
drop IncidentRecDate 
rename temp IncidentRecDate
format IncidentRecDate %td
replace IncidentRecDate = . if IncidentRecDate < 0
gen year=year(IncidentRecDate)
gen month=month(IncidentRecDate)
gen mdate=ym(year,month)
format mdate %tm
keep if mdate>=tm(2003m1) & mdate<=tm(2019m12)
tab IncidentStatus
drop if IncidentStatus=="REFERRED"
//15% of complaints are referred outside TCEQ; drop these
drop year month IncidentRecDate ComplaintIncident IncidentStatus
gen complaint=1
sort TCEQRegion
egen region_id=group(TCEQRegion)
label var region_id "numeric identifier for TCEQRegion"
gen area_id=.
replace area_id=1 if region_id==6 | region_id==7 | region_id==15 | region_id==16
replace area_id=2 if region_id==9 | region_id==11 | region_id==13
replace area_id=3 if region_id==5 | region_id==10 | region_id==12 | region_id==14
replace area_id=4 if region_id==1 | region_id==2 | region_id==3 | region_id==4 | region_id==8
label define area 1 "Border & Permian Basin" 2 "Central" 3 "Coastal & East" 4 "North Central & West"
label values area_id area
drop TCEQRegion region_id
sort area_id mdate
collapse (sum) complaint, by(area_id mdate)
rename complaint area_complaint
label var area_complaint "# complaints in area-month"
sort area_id mdate
save "$processed_data/area complaints.dta", replace

//create monthly count of air complaints by area
use "$processed_data/incidents.dta", clear
keep if Media=="AIR"
keep ComplaintIncident TCEQRegion IncidentRecDate IncidentStatus
duplicates drop
isid ComplaintIncident
gen temp = date(IncidentRecDate,"MDY")
drop IncidentRecDate 
rename temp IncidentRecDate
format IncidentRecDate %td
replace IncidentRecDate = . if IncidentRecDate < 0
gen year=year(IncidentRecDate)
gen month=month(IncidentRecDate)
gen mdate=ym(year,month)
format mdate %tm
keep if mdate>=tm(2003m1) & mdate<=tm(2019m12)
tab IncidentStatus
drop if IncidentStatus=="REFERRED"
//almost 6% of air complaints are referred outside TCEQ; drop these
drop year month IncidentRecDate ComplaintIncident IncidentStatus
gen air_complaint=1
sort TCEQRegion
egen region_id=group(TCEQRegion)
label var region_id "numeric identifier for TCEQRegion"
gen area_id=.
replace area_id=1 if region_id==6 | region_id==7 | region_id==15 | region_id==16
replace area_id=2 if region_id==9 | region_id==11 | region_id==13
replace area_id=3 if region_id==5 | region_id==10 | region_id==12 | region_id==14
replace area_id=4 if region_id==1 | region_id==2 | region_id==3 | region_id==4 | region_id==8
label define area 1 "Border & Permian Basin" 2 "Central" 3 "Coastal & East" 4 "North Central & West"
label values area_id area
drop TCEQRegion region_id
sort area_id mdate
collapse (sum) air_complaint, by(area_id mdate)
rename air_complaint area_air_complaint
label var area_air_complaint "# air complaints in area-month"
sort area_id mdate
save "$processed_data/area air complaints.dta", replace